#clean up
rm(list=ls())

#load packages
library(readxl) #replaces XLConnect
library(foreign)
library(car)

#front matter
setwd("/Volumes/MONOGAN/psrsd/anxiety2016/data/anes_timeseries_2016_dta/")

###OPEN ENDED RESPONSE MANAGEMENT###
text=as.data.frame(read_excel("anes2016presOnly.xlsx",sheet=1,col_names=TRUE))
#names(text)
#head(text)
#tail(text)
#text=subset(text.0,select=c(caseid,candlik_likewhatdpc))

##Democratic likes##
#examine, recode empty observations
text$candlik_likewhatdpc[1:15]
rev(sort(table(text$candlik_likewhatdpc,useNA="always")))[1:20]
text$candlik_likewhatdpc[text$candlik_likewhatdpc=="-1 Inapplicable"]=NA
text$candlik_likewhatdpc[text$candlik_likewhatdpc=="-7 Refused"]=NA
rev(sort(table(text$candlik_likewhatdpc,useNA="always")))[1:20]

#eliminate final statement of "no" further comment and other common problems
text$candlik_likewhatdpc=gsub(pattern="no",replacement="",ignore.case=TRUE,x=text$candlik_likewhatdpc); head(text$candlik_likewhatdpc,15)

#cut spaces, and turn various delimiters into comma delimiters
text$candlik_likewhatdpc=gsub(pattern="[.] ",replacement=",",x=text$candlik_likewhatdpc); head(text$candlik_likewhatdpc,15) #New line. Periods are now used as delimiters in 2016.
text$candlik_likewhatdpc=gsub(pattern=" ",replacement="",x=text$candlik_likewhatdpc); head(text$candlik_likewhatdpc,15)
text$candlik_likewhatdpc=gsub(pattern="//",replacement=",",x=text$candlik_likewhatdpc); head(text$candlik_likewhatdpc,15)
text$candlik_likewhatdpc=gsub(pattern="[\\]",replacement=",",x=text$candlik_likewhatdpc); head(text$candlik_likewhatdpc,15)
text$candlik_likewhatdpc=gsub(pattern="-/-",replacement=",",x=text$candlik_likewhatdpc); head(text$candlik_likewhatdpc,15)
text$candlik_likewhatdpc=gsub(pattern=";",replacement=",",x=text$candlik_likewhatdpc); head(text$candlik_likewhatdpc,15)

#check for empty strings, turn to missing
text$candlik_likewhatdpc[text$candlik_likewhatdpc ==""]=NA
text$candlik_likewhatdpc[text$candlik_likewhatdpc =="NA"]=NA #New line. This is a new coding in 2016.

#split the string
candlik_likewhatdpc.list=strsplit(text$candlik_likewhatdpc,split=","); head(candlik_likewhatdpc.list,15)

#count the number of comments, record in the data
count=as.numeric(lapply(candlik_likewhatdpc.list,length));count[1:15];length(count)
empty=as.numeric(lapply(lapply(candlik_likewhatdpc.list,'[[',1),is.na));empty[1:15];length(empty)
text$dem.like=count-empty
head(text$candlik_likewhatdpc,15);head(text$dem.like,15)
tail(text$candlik_likewhatdpc,15);tail(text$dem.like,15)

##Democratic dislikes##
#examine, recode empty observations
text$candlik_dislwhatdpc[1:15]
rev(sort(table(text$candlik_dislwhatdpc,useNA="always")))[1:20]
text$candlik_dislwhatdpc[text$candlik_dislwhatdpc=="-1 Inapplicable"]=NA
text$candlik_dislwhatdpc[text$candlik_dislwhatdpc=="-7 Refused"]=NA
rev(sort(table(text$candlik_dislwhatdpc,useNA="always")))[1:20]

#eliminate final statement of "no" further comment and other common problems
text$candlik_dislwhatdpc=gsub(pattern="no",replacement="",ignore.case=TRUE,x=text$candlik_dislwhatdpc); head(text$candlik_dislwhatdpc, 15)

#cut spaces, and turn various delimiters into comma delimiters
text$candlik_dislwhatdpc=gsub(pattern="[.] ",replacement=",",x=text$candlik_dislwhatdpc); head(text$candlik_dislwhatdpc,15) #New line. Periods are now used as delimiters in 2016.
text$candlik_dislwhatdpc=gsub(pattern=" ",replacement="",x=text$candlik_dislwhatdpc); head(text$candlik_dislwhatdpc,15)
text$candlik_dislwhatdpc=gsub(pattern="//",replacement=",",x=text$candlik_dislwhatdpc); head(text$candlik_dislwhatdpc,15)
text$candlik_dislwhatdpc=gsub(pattern="[\\]",replacement=",",x=text$candlik_dislwhatdpc); head(text$candlik_dislwhatdpc,15)
text$candlik_dislwhatdpc=gsub(pattern="-/-",replacement=",",x=text$candlik_dislwhatdpc); head(text$candlik_dislwhatdpc,15)
text$candlik_dislwhatdpc=gsub(pattern=";",replacement=",",x=text$candlik_dislwhatdpc); head(text$candlik_dislwhatdpc,15)

#check for empty strings, turn to missing
text$candlik_dislwhatdpc[text$candlik_dislwhatdpc ==""]=NA
text$candlik_likewhatdpc[text$candlik_dislwhatdpc =="NA"]=NA #New line. This is a new coding in 2016.

#split the string
candlik_dislwhatdpc.list=strsplit(text$candlik_dislwhatdpc,split=","); head(candlik_dislwhatdpc.list,15)

#count the number of comments, record in the data
count=as.numeric(lapply(candlik_dislwhatdpc.list,length));count[1:15];length(count)
empty=as.numeric(lapply(lapply(candlik_dislwhatdpc.list,'[[',1),is.na));empty[1:15];length(empty)
text$dem.dislike=count-empty
head(text$candlik_dislwhatdpc,15);head(text$dem.dislike,15)
tail(text$candlik_dislwhatdpc,15);tail(text$dem.dislike,15)

##Republican likes##
#examine, recode empty observations
text$candlik_likewhatrpc[1:15]
rev(sort(table(text$candlik_likewhatrpc,useNA="always")))[1:20]
text$candlik_likewhatrpc[text$candlik_likewhatrpc=="-1 Inapplicable"]=NA
text$candlik_likewhatrpc[text$candlik_likewhatrpc=="-7 Refused"]=NA
rev(sort(table(text$candlik_likewhatrpc,useNA="always")))[1:20]

#eliminate final statement of "no" further comment and other common problems
text$candlik_likewhatrpc=gsub(pattern="no",replacement="",ignore.case=TRUE,x=text$candlik_likewhatrpc); head(text$candlik_likewhatrpc,15)

#cut spaces, and turn various delimiters into comma delimiters
text$candlik_likewhatrpc=gsub(pattern="[.] ",replacement=",",x=text$candlik_likewhatrpc); head(text$candlik_likewhatrpc,15) #New line. Periods are now used as delimiters in 2016.
text$candlik_likewhatrpc=gsub(pattern=" ",replacement="",x=text$candlik_likewhatrpc); head(text$candlik_likewhatrpc,15)
text$candlik_likewhatrpc=gsub(pattern="//",replacement=",",x=text$candlik_likewhatrpc); head(text$candlik_likewhatrpc,15)
text$candlik_likewhatrpc=gsub(pattern="[\\]",replacement=",",x=text$candlik_likewhatrpc); head(text$candlik_likewhatrpc,15)
text$candlik_likewhatrpc=gsub(pattern="-/-",replacement=",",x=text$candlik_likewhatrpc); head(text$candlik_likewhatrpc,15)
text$candlik_likewhatrpc=gsub(pattern=";",replacement=",",x=text$candlik_likewhatrpc); head(text$candlik_likewhatrpc,15)

#check for empty strings, turn to missing
text$candlik_likewhatrpc[text$candlik_likewhatrpc ==""]=NA
text$candlik_likewhatdpc[text$candlik_likewhatrpc =="NA"]=NA #New line. This is a new coding in 2016.

#split the string
candlik_likewhatrpc.list=strsplit(text$candlik_likewhatrpc,split=","); head(candlik_likewhatrpc.list,15)

#count the number of comments, record in the data
count=as.numeric(lapply(candlik_likewhatrpc.list,length));count[1:15];length(count)
empty=as.numeric(lapply(lapply(candlik_likewhatrpc.list,'[[',1),is.na));empty[1:15];length(empty)
text$rep.like=count-empty
head(text$candlik_likewhatrpc,15);head(text$rep.like,15)
tail(text$candlik_likewhatrpc,15);tail(text$rep.like,15)

##Republican dislikes##
#examine, recode empty observations
text$candlik_dislwhatrpc[1:15]
rev(sort(table(text$candlik_dislwhatrpc,useNA="always")))[1:20]
text$candlik_dislwhatrpc[text$candlik_dislwhatrpc=="-1 Inapplicable"]=NA
text$candlik_dislwhatrpc[text$candlik_dislwhatrpc=="-7 Refused"]=NA
rev(sort(table(text$candlik_dislwhatrpc,useNA="always")))[1:20]

#eliminate final statement of "no" further comment and other common problems
text$candlik_dislwhatrpc=gsub(pattern="no",replacement="",ignore.case=TRUE,x=text$candlik_dislwhatrpc); head(text$candlik_dislwhatrpc,15)

#cut spaces, and turn various delimiters into comma delimiters
text$candlik_dislwhatrpc=gsub(pattern="[.] ",replacement=",",x=text$candlik_dislwhatrpc); head(text$candlik_dislwhatrpc,15) #New line. Periods are now used as delimiters in 2016.
text$candlik_dislwhatrpc=gsub(pattern=" ",replacement="",x=text$candlik_dislwhatrpc); head(text$candlik_dislwhatrpc,15)
text$candlik_dislwhatrpc=gsub(pattern="//",replacement=",",x=text$candlik_dislwhatrpc); head(text$candlik_dislwhatrpc,15)
text$candlik_dislwhatrpc=gsub(pattern="[\\]",replacement=",",x=text$candlik_dislwhatrpc); head(text$candlik_dislwhatrpc,15)
text$candlik_dislwhatrpc=gsub(pattern="-/-",replacement=",",x=text$candlik_dislwhatrpc); head(text$candlik_dislwhatrpc,15)
text$candlik_dislwhatrpc=gsub(pattern=";",replacement=",",x=text$candlik_dislwhatrpc); head(text$candlik_dislwhatrpc,15)

#check for empty strings, turn to missing
text$candlik_dislwhatrpc[text$candlik_dislwhatrpc ==""]=NA
text$candlik_dislwhatrpc[text$candlik_likewhatdpc =="NA"]=NA #New line. This is a new coding in 2016.

#split the string
candlik_dislwhatrpc.list=strsplit(text$candlik_dislwhatrpc,split=","); head(candlik_dislwhatrpc.list,15)

#count the number of comments, record in the data
count=as.numeric(lapply(candlik_dislwhatrpc.list,length));count[1:15];length(count)
empty=as.numeric(lapply(lapply(candlik_dislwhatrpc.list,'[[',1),is.na));empty[1:15];length(empty)
text$rep.dislike=count-empty
head(text$candlik_dislwhatrpc,15);head(text$rep.dislike,15)
tail(text$candlik_dislwhatrpc,15);tail(text$rep.dislike,15)

##Create the end count file##
text.count=subset(text,select=c(V160001,dem.like,dem.dislike,rep.like,rep.dislike))#V160001 is the "caseid"
write.csv(text.count,"textCount2016.csv",row.names=F)


###QUANTITATIVE RESPONSE MANAGEMENT###
#load data
anes.0=read.dta("anes_timeseries_2016_Stata12.dta",convert.factors=F)

#subset
anes.1=subset(anes.0,select=c(version,V160001,V162034a,
V161116,V161117,V161118,V161119,V161120,V161121,V161122,V161123,V161124,V161125,
V161158x,V161178,V161179,V161180,V161181,V161182,V161183,V161184,V161185,V161186,V161189,V161190,V161191,V161198,V161199,V161200,V161201,V161202,V161203,V161068,V161071,V161074,V161077))

#rename
names(anes.1)<-c("version","caseid","postvote_presvtwho",
"dAngry","dHope","dAfraid","dProud","dDisgust","rAngry","rHope","rAfraid","rProud","rDisgust",
"pid_x","spsrvpr_ssself","spsrvpr_ssdpc","spsrvpr_ssrpc","defsppr_self","defsppr_dpc","defsppr_rpc","inspre_self","inspre_dpc","inspre_rpc","guarpr_self","guarpr_dpc","guarpr_rpc","aidblack_self","aidblack_dpc","aidblack_rpc","envjob_self","envjob_dpc","envjob_rpc","candlik_likedpc","candlik_disldpc","candlik_likerpc","candlik_dislrpc")
names(text.count)[1]<-"caseid"

#merge
anes=merge(x=anes.1,y=text.count,by='caseid')

#clean vote choice variable, subset to those who voted for D (coded #1) or R (coded #2)
#also, eliminate true independents (pid_x==4)
anes$postvote_presvtwho[anes$postvote_presvtwho%in%c(-9,-8,-7,-6,-1,3,4,5,7,9)]=NA
anes=subset(anes,subset=!is.na(postvote_presvtwho) & pid_x!=4)

#data cleaning, any negative number is out, as these are all missing data codes
anes[anes<0]=NA
anes[anes==99]=NA
#summary(anes)
#table(anes<0)

#voted for candidate of own party, or of Republican
anes$own=as.numeric(anes$postvote_presvtwho==2 & anes$pid_x>4 | anes$postvote_presvtwho==1 & anes$pid_x<4)
anes$vote.rep=anes$postvote_presvtwho-1

#rescale partisanship
anes$pid_x=(anes$pid_x-min(anes$pid_x,na.rm=T))/(max(anes$pid_x,na.rm=T)-min(anes$pid_x,na.rm=T))

#issue advantage measure
anes$issues=abs(anes$spsrvpr_ssdpc-anes$spsrvpr_ssself)-abs(anes$spsrvpr_ssrpc-anes$spsrvpr_ssself)+abs(anes$defsppr_dpc-anes$defsppr_self)-abs(anes$defsppr_rpc-anes$defsppr_self)+abs(anes$inspre_dpc-anes$inspre_self)-abs(anes$inspre_rpc-anes$inspre_self)+abs(anes$guarpr_dpc-anes$guarpr_self)-abs(anes$guarpr_rpc-anes$guarpr_self)+abs(anes$aidblack_dpc-anes$aidblack_self)-abs(anes$aidblack_rpc-anes$aidblack_self)+abs(anes$envjob_dpc-anes$envjob_self)-abs(anes$envjob_rpc-anes$envjob_self)
anes$issues =(anes$issues-min(anes$issues,na.rm=T))/(max(anes$issues,na.rm=T)-min(anes$issues,na.rm=T))

#candidate personal quality measure
anes$personal=anes$rep.like+anes$dem.dislike-anes$rep.dislike-anes$dem.like

#Scale emotion coding to run on a 0 to 1 scale.
anes$dAngry<-(anes$dAngry-1)/4
anes$dHope <-(anes$dHope-1)/4
anes$dAfraid <-(anes$dAfraid-1)/4
anes$dProud <-(anes$dProud-1)/4
anes$dDisgust <-(anes$dDisgust-1)/4
anes$rAngry <-(anes$rAngry-1)/4
anes$rHope <-(anes$rHope-1)/4
anes$rAfraid <-(anes$rAfraid-1)/4
anes$rProud <-(anes$rProud-1)/4
anes$rDisgust <-(anes$rDisgust-1)/4

#candidate own emotions
anes$democrat=as.numeric(anes$pid_x<.5)
anes$ang.own=ifelse(anes$democrat==1,anes$dAngry,anes$rAngry)
anes$hp.own=ifelse(anes$democrat==1,anes$dHope,anes$rHope)
anes$afr.own=ifelse(anes$democrat==1,anes$dAfraid,anes$rAfraid)
anes$prd.own=ifelse(anes$democrat==1,anes$dProud,anes$rProud)
anes$disg.own=ifelse(anes$democrat==1,anes$dDisgust,anes$rDisgust)

###Write Data###
anes=na.omit(subset(anes,select=c(caseid,vote.rep,pid_x,issues,personal,ang.own,hp.own,afr.own,prd.own,disg.own)))
summary(anes$personal)
anes$personal=(anes$personal-min(anes$personal,na.rm=T))/(max(anes$personal,na.rm=T)-min(anes$personal,na.rm=T)) #rescale "personal" as extreme values get stripped-out due to other missing variables.
summary(anes); dim(anes)
write.table(anes,"anes16.txt",row.names=F)

###DESCRIPTIVES###
#anes<-read.table("anes16.txt",header=T)
anes<-subset(anes,select=-caseid)
cbind(
apply(anes,2,mean),
apply(anes,2,sd),
apply(anes,2,min),
apply(anes,2,max)
)


